## Clayton, Horrillo, and Sniderman 
## The BIAT and the AMP As Measures of Racial Prejudice in Political Science: A Methodological Assessment
## Step 2: Read and clean time series data

# Initial settings --------------------------------------------------------

rm(list = ls())
library(tidyverse)
library(readstata13)
library(haven)


# Read data and calculate AMP scores  -------------------------------------

anes.amp <- read.dta13("data/anes_timeseries_2008.dta") %>% 
  select(V080001, V085311:V085360b) %>% 
  filter(V085360a == 1) %>% # exclude those with a visual impairment that prevented them from completing
  mutate(V085311 = ifelse(V085311 < 0, NA, V085311), # make negative values NA 
         V085335 = ifelse(V085335 < 0, NA, V085335)) %>%
  select(-contains("a")) %>% 
  select(-contains("b")) %>% 
  select(-contains("c")) %>% 
  select(-c(V085359))

anes.amp[anes.amp == -5] <- NA 

caseid <- anes.amp$V080001

anes.amp <- anes.amp %>% select(-V080001)

black <- anes.amp %>%
  select(1:24) %>% # filter black faces
  rowMeans(na.rm = TRUE) # calculate means for each respondent, ignore NA 

white <- anes.amp %>%
  select(25:48) %>% # filter white faces
  rowMeans(na.rm = TRUE) # calculate means for each respondent, ignore NA

with.ids <- as.data.frame(cbind(caseid, black, white)) # merge back in the case id
colnames(with.ids) <- c("caseid", "black.mean", "white.mean") # add column names

with.ids <- with.ids %>%
  mutate(diff = white.mean - black.mean) %>% 
  mutate(cheater = ifelse((black.mean == 0 & white.mean == 0) | (black.mean == 1 & white.mean == 1), 1, 0))

# Clean ANES data with additional variables -------------------------------

anes.others <- read.dta13("data/anes_timeseries_2008.dta") %>% 
  select(V080001, 
         V083251a, V083251b, 
         V085174a, V085174b, 
         V085175a, V085175b,
         V085065c, V085064y, 
         V085044a
  ) %>% 
  filter(V083251a == 50 & V083251b == -1) %>% # white only
  rename(caseid = V080001,
         vote = V085044a) %>% 
  mutate_if(is.factor, as.numeric) %>% 
  select(-c(V083251a, V083251b))

anes.others[anes.others < 0] <- NA

# Merge everything --------------------------------------------------------

all.merged <- left_join(anes.others, with.ids, by = "caseid") # merge, keep complete cases only

all.merged <- all.merged %>%
  filter(!is.na(diff)) %>% 
  mutate(obama = ifelse(vote == 1, 1, 0)) %>% 
  mutate(intelldiff = V085175a - V085175b, 
         lazydiff = V085174a - V085174b,
         thermdiff = V085065c - V085064y) 

all.merged.final <- all.merged %>% 
  select(diff, thermdiff, lazydiff, intelldiff, cheater) %>% 
  drop_na() %>% 
  filter(cheater == 0)

# Obama/AMP correlation ---------------------------------------------------

cor(all.merged$diff, all.merged$obama, use = "na.or.complete")


# Save data ---------------------------------------------------------------

write.csv(all.merged.final, "output/main_time.csv")
